# make df of pundit aggregated pivot scaling scores
library(tidyverse)
library(data.table)
library(dtplyr)
library(parrot)

require(foreach)
require(doMC)
registerDoMC(cores = detectCores() - 1)

source("../scripts/pundits_functions.R")

pundit.meta <- data.table::fread("../pundits_allscales.csv")

set.seed(1111)

files <- list.files(path = "../output/parrots_out/",
                    pattern = "*.RData")
for(l in files){
  load(paste0("../output/parrots_out/",l))
}

namevec <- c("china","class","climate",
             "conservative","democrat",
             "far_left","far_right",
             "gender","guns", "health_care_insurance",
             "immigration","iran","israel",
             "lgbt","liberal", "mueller",
             "progressive","race",
             "reproductive_health","republican",
             "taxes_spending","trade")

parrots <- list(china_parrot,class_parrot,climate_parrot,
                conservative_parrot,democrat_parrot,
                far_left_parrot,far_right_parrot,
                gender_parrot,guns_parrot, health_care_insurance_parrot,
                immigration_parrot,iran_parrot,israel_parrot,
                lgbt_parrot,liberal_parrot, mueller_parrot,
                progressive_parrot,race_parrot,
                reproductive_health_parrot,republican_parrot,
                taxes_spending_parrot,trade_parrot)
names(parrots) <- namevec

scores <- lapply(1:length(parrots), function(x){
  s <- parrot::score_documents(
    scores=parrots[[x]], n_dimensions=11
  )
})

scores_size <- data.frame(topic = names(parrots),
                          n = sapply(scores, function(s){
                            nrow(s)
                          }),
                          users = sapply(scores, function(s){
                            length(unique(s$twitter_handle))
                          }))
write.csv(scores_size, file = "../output/scores_size.csv")

aggs <- lapply(1:length(scores), function(x){
  a <- unique(scores[[x]] %>%
                dplyr::select(-starts_with("X0")) %>%
                group_by(twitter_handle) %>%
                summarise_at(.vars = dplyr::vars(starts_with("X")),
                             .funs = mean))
  names(a)[2:ncol(a)] <- paste0(names(a)[2:ncol(a)], "_", namevec[x])
  return(a)
})

pundits.agg <- data.frame(twitter_handle = pundit.meta$twitter_handle)

for(i in 1:length(aggs)){
  pundits.agg <- pundits.agg %>% 
    left_join(aggs[[i]], 
              by = c("twitter_handle"))
}

pundits.agg.imp0 <- pundits.agg %>%
  mutate_at(.vars = dplyr::vars(starts_with("X")),
            .funs = function(i){
              ifelse(is.na(i), 0, i)
            })
data.table::fwrite(pundits.agg.imp0, file = "../output/pundits_parrot_aggregated_imp0.csv")
